##################################################################
# /* Author: Gautam Nair Nicholas Sambanis */
# /* Project: Nationalism Ethnic Identification */
# /* Description: Heterogeneous Effects Analysis with Benjamini Hochberg Correction */

##################################################################
# loading packages
##################################################################
library(Hmisc)
library(foreign)
library(sandwich)
library(lmtest)
library(numDeriv)
library(stargazer)
library(ggplot2)
library(plyr)
library(gridExtra)
library(dplyr)
library(scales)
library(stats)

##################################################################
#  Violence vs Control Heterogeneous Effects on National Identification index BH Correction on Full Model
##################################################################

rm(list=ls())
data.working <- read.csv("nei_kashmir_replication_dataset.csv")

ind.var <- c("group2_violence")

data.working <- data.working[data.working$group1_control==1 | data.working$group2_violence==1,]

vars <- c("median_age",               
				"gender_female",		
				"education_class12",
				"religion_sunni",
				"main_earner",
				"high_index_income_wealth1",
				"occupation_govt",
				"ic_high_national_id_acc"
)

treatvar <- c("group2_violence")

data.working$i_median_age <- data.working$group2_violence*data.working$median_age
data.working$i_gender_female <- data.working$group2_violence*data.working$gender_female
data.working$i_education_class12 <- data.working$group2_violence*data.working$education_class12
data.working$i_religion_sunni <- data.working$group2_violence*data.working$religion_sunni
data.working$i_main_earner <- data.working$group2_violence*data.working$main_earner
data.working$i_high_index_income_wealth1 <- data.working$group2_violence*data.working$high_index_income_wealth1
data.working$i_occupation_govt <- data.working$group2_violence*data.working$occupation_govt
data.working$i_ic_high_national_id_acc <- data.working$group2_violence*data.working$ic_high_national_id_acc

ivars <- c("i_median_age",               
	"i_gender_female",		
	"i_education_class12",
	"i_religion_sunni",
	"i_main_earner",
	"i_high_index_income_wealth1",
	"i_occupation_govt",
	"i_ic_high_national_id_acc"
)

spec1 <- c(treatvar)
spec2 <- c(treatvar, vars)
spec3 <- c(treatvar, vars, ivars)

my.formula <-paste("ic_index_id_no_ind_pak",'~', paste(spec1, collapse= ' + '))
temp.model.1 <- lm(my.formula, data=data.working)
temp.se.1 <- coeftest(temp.model.1, vcov=vcovHC(temp.model.1,type="HC2"))[,2]

my.formula <-paste("ic_index_id_no_ind_pak",'~', paste(spec2, collapse= ' + '))
temp.model.2 <- lm(my.formula, data=data.working)
temp.se.2 <- coeftest(temp.model.2, vcov=vcovHC(temp.model.2,type="HC2"))[,2]

my.formula <-paste("ic_index_id_no_ind_pak",'~', paste(spec3, collapse= ' + '))
temp.model.3 <- lm(my.formula, data=data.working)
temp.se.3 <- coeftest(temp.model.3, vcov=vcovHC(temp.model.3,type="HC2"))[,2]

varlabels <- c(
"Violence Exposure", 
"Age Above Median", 
"Female",
"Completed High School", 
"Sunni Muslim", 
"Main Earner", 
"Income and Wealth Above Median", 
"Government Occupation",
"Constituency National Identification Above Median", 
"Age x Violence",
"Female x Violence",
"High School x Violence",
"Sunni Muslim x Violence",
"Main Earner x Violence",
"Income and Wealth x Violence", 
"Government Occupation x Violence",
"Constituency National Identification x Violence",
"Constant")

temp.coefs <- temp.model.3$coefficients
names(temp.coefs) <-NULL
bh.data <- as.data.frame(cbind(temp.coefs, temp.se.3))
bh.data$varlabels <- varlabels
t.vals <- abs(bh.data$temp.coefs/bh.data$temp.se.3)
bh.data$t.vals <- t.vals
dfs <- nrow(data.working)-(length(varlabels))
bh.data$p.val <- 2*pt(-abs(t.vals), df=dfs)
bh.data$p.val.adj.1 <- round(p.adjust(bh.data$p.val, "BH"), 3)
bh.pvals <-bh.data$p.val.adj.1
bh.pvals <- setNames(bh.pvals, names(temp.se.3))
#bh.pvals <- list(bh.pvals)

spectitle <- c("Heterogeneous Treatment Effects of Violence Exposure on National Identification Index (Benjamini-Hochberg Adjusted p-values)")
output.file <- c("tf_t_13_violence_cate_bh_pvals")
temptype <- c("latex", "text")
tempext <- c(".tex", ".txt")

for(q in 1:length(temptype)){
	temp.output <- paste(output.file, tempext[q], sep="")
 stargazer(temp.model.3, se=list(temp.se.3),
  			p=list(bh.pvals),
			type= temptype[q],
 			report=c("vcs*p"),
 			single.row=TRUE,
         title= spectitle,
          out=temp.output,
          no.space=TRUE, 
          model.numbers= TRUE,
         notes.append=FALSE,
          align=FALSE,
         covariate.labels=varlabels,
          header= FALSE,
          font.size="small",
          model.names=FALSE,
          digits=2,
          star.char=c("**", "***"),
          star.cutoffs=c(0.05, 0.01),
          omit.stat = c("rsq", "f","adj.rsq"),
          column.labels  = NULL,
          dep.var.caption  = "",
          dep.var.labels.include=FALSE,
         df=FALSE,
         omit.table.layout="n",
         nobs=TRUE
        # order= c(spec3, "(Intercept)")
)
}
##################################################################
#  Violence vs Control Heterogeneous Effects on National Identification index BH Correction by subgroup
################################################################## 

rm(list=ls())
data.working <- read.csv("nei_kashmir_replication_dataset.csv")

vars <- c("median_age",               
				"gender_female",		
				"education_class12",
				"religion_sunni",
				"main_earner",
				"high_index_income_wealth1",
				"occupation_govt",
				"ic_high_national_id_acc"
)

vars.labels <- c(
"Age Above Median", 
"Female",
"Completed High School", 
"Sunni Muslim", 
"Main Earner", 
"Income and Wealth Above Median", 
"Government Occupation",
"Constituency National ID Above Median")

table.titles <- c("Split Sample Tests of Heterogeneous Treatment Effects of Violence Exposure on National Identification Index (Benjamini-Hochberg Adjusted p-values)")

filenames <- c("tf_t_14_violence_cate_bh_split_samples")

first.row <- c("Dummy Variable taking values 1/0",
"CATE (1)",
"CATE (0)",
"Diff. p-val.",
"Diff. p-val. (BH adj.)",
"Control Mean (1)",
"Control Mean (0)"
)

dep.var <- c("ic_index_id_no_ind_pak")

treatvar <- c("group2_violence")

varslength <- length(vars)
decimals <- 2

temp.tables.list <- vector("list", length(dep.var))

for(y in 1:length(dep.var)){

		data.subset <- data.working[data.working$group1_control==1 | data.working$group2_violence==1,]

	control.mean <- control.mean.se <- cates <- cate.se <- cate.se.stars <-  pvalue <-  N <- p.val.f <- p.val.f.noround <- p.val.f.bh <- rep(NA, varslength)
	
	for (x in c(1, 0)){
		for (i in 1:length(vars)){ 
			index = which(data.subset[,vars[i]]==x)
			tempdata = data.subset[index,]
	
			my.formula <-paste(dep.var[y],'~', paste(treatvar, collapse= ' + '))
			reg.model <- lm(my.formula, data=tempdata)
			cov <- vcovHC(reg.model, type = "HC")
			robustse <- sqrt(diag(cov))
	
			control.mean[i] <- format(round(reg.model$coefficients[1], decimals), nsmall=decimals)
			controlse <- format(round(robustse[1], decimals), nsmall=decimals)
			
			my.formula <-paste(dep.var[y],'~', paste(treatvar, collapse= ' + '))
			ttest <- t.test(tempdata[,dep.var[y]]~tempdata[,treatvar], data=tempdata)
			pvalue[i] <- format(round(ttest$p.value, decimals), nsmall=decimals)
		
			cates[i] <- reg.model$coefficients[2]
			cate.se <- robustse[2]
			cate <- reg.model$coefficients[2]
			cate <- format(round(cate, decimals), nsmall=decimals)
			catese <- format(round(robustse[2], decimals), nsmall=decimals)
	
			cate.se.stars[i] <- paste(cate," ", "(", catese, ")", sep="")
			
			N[i] <- nrow(tempdata)
			
			# F-test of difference
			data.temp <- cbind(data.subset[,treatvar], data.subset[,dep.var[y]], data.subset[,vars[i]])
			Z <- data.temp[,1]
			Y <- data.temp[,2]
			X <- data.temp[,3]
			
			f.pvalue <- anova(lm(Y~X*Z))[3,5]
			p.val.f[i] <-format(round(f.pvalue, decimals), nsmall=decimals)
			p.val.f.noround[i] <-f.pvalue
		}
		if (x==1) {
			cates.1<- cbind (cate.se.stars)
			control.mean.1 <- control.mean
			N.1 <- N
		}
		if (x==0) {
			cates.0<- cbind (cate.se.stars)
			control.mean.0 <- control.mean
			N.0 <-N
		}
	}
	p.val.f.bh <- round(p.adjust(p.val.f.noround, "BH"), 2)
	#temp.table <- cbind(vars.labels, cates.1, cates.0, p.val.f, control.mean.1, control.mean.0, N.1, N.0)
	temp.table <- cbind(vars.labels, cates.1, cates.0, p.val.f, p.val.f.bh, control.mean.1, control.mean.0)
	temp.table <- as.data.frame(temp.table)
	colnames(temp.table) <- first.row
	temp.tables.list[[y]] <- temp.table
	temptype <- c("latex", "text")
	tempext <- c(".tex", ".txt")
	output.file <- filenames[y]
	for(q in 1:length(temptype)){
		temp.output <- paste(output.file, tempext[q], sep="")
		stargazer(temp.table, 
		title = table.titles[y],
		out = temp.output,
		font.size = "footnotesize",
		summary=FALSE, 
		rownames=FALSE,
		type= temptype[q])
	}
}
